library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(p8105.datasets)

library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout

Load the instacart dataset

data("instacart")

dow_labels <- c("Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat")

instacart = 
  instacart |> 
  mutate(
    order_dow = factor(order_dow, levels = 0:6, labels = dow_labels)
  )
  1. Barchart : the number of items ordered in each aisle (more than 10000 items ordered)
instacart_bar_data =
  instacart |> 
  group_by(aisle) |> 
  summarise(count = n()) |> 
  filter(count > 10000) |> 
  arrange(desc(count))

plot_ly(
  data = instacart_bar_data,
  x = ~count,
  y = ~reorder(aisle, count),
  type = "bar",
  orientation = "h"
) |> 
  layout(
    title = "Number of Items Ordered by Aisle (>10,000)",
    xaxis = list(title = "Number of Items Ordered"),
    yaxis = list(title = "Aisle")
  )
  1. Box plot : distribution of order time by day of week
instacart |> 
  select(order_dow, order_hour_of_day) |> 
  plot_ly(
    x = ~order_dow, y =~order_hour_of_day, color = ~order_dow,
    type = "box", colors = "viridis"
  ) |> 
  layout(
    title = "Distribution of order time by day of week",
    xaxis = list(title = "Day of Week"),
    yaxis = list(title = "Hour of Day (0–23H)")
  )
  1. Scatter + Line Plot : Orders Started per Hour
instacart_line = 
  instacart |>
  filter(add_to_cart_order == 1) |> 
  group_by(order_hour_of_day) |>
  summarise(
    n_orders_started = n(),
    .groups = "drop"
  ) |>
  arrange(order_hour_of_day)

plot_ly(
  data = instacart_line,
  x = ~order_hour_of_day,
  y = ~n_orders_started,
  type = "scatter",
  mode = "markers+lines",
  marker = list(size = 8, opacity = 0.7)
) |>
  layout(
    title = "Orders Started per Hour",
    xaxis = list(title = "Hour of Day (0–23)"),
    yaxis = list(title = "Number of orders")
  )